## CLEANING DE ##
setwd("C:/Users/yuxua/Documents/careers/DEL intern/data deposit")
library(haven)
library(dplyr)
library(janitor)

DE_W1 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_Germany_Wave1_client.sav")
DE_W2 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_Germany_Wave2_client.sav")
DE_W3 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_Germany_Wave3_client_DELseg.sav")
DE_W4 <- read_sav("C:/Users/yuxua/Documents/careers/DEL intern/data deposit/UCL_DEL_Panel_Germany_Wave4_client.sav")
library(haven)
DE_W5 <- read_sav("UCL_DEL_Panel_Germany_Wave5_client.sav")

#### update DE_W1
names(DE_W1)
# all missing variables in

#rename variables
DE_W1 <- DE_W1 %>% 
  rename(nielsenregion = region_nielsen,
         religion = reli,
         income = household_income,
         education = educ_neu,
         ben1_w1 = ben1,
         ben1_w1_1 = ben1_1,
         ben1_w1_2 = ben1_2,
         ben1_w1_3 = ben1_3,
         ben1_w1_4 = ben1_4,
         cost1_w1 = cost1,
         cost1_w1_1 = cost1_1,
         cost1_w1_2 = cost1_2,
         cost1_w1_3 = cost1_3,
         cost1_w1_4 = cost1_4) 

#recode
table(DE_W1$purpose_w1) #1-12, recode to 0-11
table(DE_W1$cost5_w1) #1-12, recode to 0-11
table(DE_W1$trust1_w1) #0-11
table(DE_W1$trust2_w1) #0-11
table(DE_W1$trust3_w1) #0-11
table(DE_W1$dem1_w1) #1-12, recode to 0 to 11

#subtracting the variables
DE_W1 <- DE_W1 %>%
  mutate(purpose_w1 = purpose_w1-1)
DE_W1 <- DE_W1 %>%
  mutate(cost5_w1 = cost5_w1-1)
DE_W1 <- DE_W1 %>%
  mutate(dem1_w1 = dem1_w1-1)

table(DE_W1$track8_w1_1) #0-10, 12, needs to be recoded
table(DE_W1$track8_w1_2) #0-10, 12, needs to be recoded
table(DE_W1$track8_w1_3) #0-10, 12, needs to be recoded
table(DE_W1$track8_w1_4) #0-10, 12, needs to be recoded
table(DE_W1$track8_w1_5) #0-10, 12, needs to be recoded
table(DE_W1$track9_w1) #0-10, 12, needs to be recoded
table(DE_W1$track10_w1) #0-10, 12, needs to be recoded

#turn to numeric before recoding 12 to 11
DE_W1$track8_w1_1 <- as.numeric(DE_W1$track8_w1_1)
DE_W1$track8_w1_2 <- as.numeric(DE_W1$track8_w1_2)
DE_W1$track8_w1_3 <- as.numeric(DE_W1$track8_w1_3)
DE_W1$track8_w1_4 <- as.numeric(DE_W1$track8_w1_4)
DE_W1$track8_w1_5 <- as.numeric(DE_W1$track8_w1_5)
DE_W1$track9_w1 <- as.numeric(DE_W1$track9_w1)
DE_W1$track10_w1 <- as.numeric(DE_W1$track10_w1)

DE_W1 <- DE_W1 %>%
  mutate(track8_w1_1 = dplyr::recode(track8_w1_1, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track8_w1_2 = dplyr::recode(track8_w1_2, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track8_w1_3 = dplyr::recode(track8_w1_3, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track8_w1_4 = dplyr::recode(track8_w1_4, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track8_w1_5 = dplyr::recode(track8_w1_5, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track9_w1 = dplyr::recode(track9_w1, '12'=11))
DE_W1 <- DE_W1 %>%
  mutate(track10_w1 = dplyr::recode(track10_w1, '12'=11))

# after checking excel
table(DE_W1$track6_w1) #0-10, 12, recode 12 to 11
table(DE_W1$dnut_w1) #1-12, to recode to 0-11

DE_W1$track6_w1 <- as.numeric(DE_W1$track6_w1)
DE_W1 <- DE_W1 %>%
  mutate(track6_w1 = dplyr::recode(track6_w1, '12'=11))

DE_W1 <- DE_W1 %>%
  mutate(dnut_w1 = dnut_w1-1)

write_sav(DE_W1, "UCL_DEL_Panel_Germany_Wave1_client_new.sav")


#### update DE_W2
names(DE_W2)
# all missing variables in

#rename variables
DE_W2 <- DE_W2 %>% 
  rename(religion = reli,
         income = hinc,
         education = educ_neu,
         limits_w2 = limit_w2) 

#recode
table(DE_W2$purpose_w2) #1-12, recode to 0-11
table(DE_W2$cost5_w2) #1-12, recode to 0-11
table(DE_W2$trust1_w2) #0-11
table(DE_W2$trust2_w2) #0-11
table(DE_W2$trust3_w2) #0-11
table(DE_W2$dem1_w2) #1-12, recode to 0 to 11
table(DE_W2$dnut_w2) #1-12, recode to 0 to 11

#subtracting the variables
DE_W2 <- DE_W2 %>%
  mutate(purpose_w2 = purpose_w2-1)
DE_W2 <- DE_W2 %>%
  mutate(cost5_w2 = cost5_w2-1)
DE_W2 <- DE_W2 %>%
  mutate(dem1_w2 = dem1_w2-1)
DE_W2 <- DE_W2 %>%
  mutate(dnut_w2 = dnut_w2-1)

table(DE_W2$track6_w2) #0-10, 12, recode 12 to 11
table(DE_W2$track8_w2_1) #0-10, 12, needs to be recoded
table(DE_W2$track8_w2_2) #0-10, 12, needs to be recoded
table(DE_W2$track8_w2_3) #0-10, 12, needs to be recoded
table(DE_W2$track8_w2_4) #0-10, 12, needs to be recoded
table(DE_W2$track8_w2_5) #0-10, 12, needs to be recoded
table(DE_W2$track9_w2) #0-10, 12, needs to be recoded
table(DE_W2$track10_w2) #0-10, 12, needs to be recoded

#turn to numeric before recoding 12 to 11
DE_W2$track6_w2 <- as.numeric(DE_W2$track6_w2)
DE_W2$track8_w2_1 <- as.numeric(DE_W2$track8_w2_1)
DE_W2$track8_w2_2 <- as.numeric(DE_W2$track8_w2_2)
DE_W2$track8_w2_3 <- as.numeric(DE_W2$track8_w2_3)
DE_W2$track8_w2_4 <- as.numeric(DE_W2$track8_w2_4)
DE_W2$track8_w2_5 <- as.numeric(DE_W2$track8_w2_5)
DE_W2$track9_w2 <- as.numeric(DE_W2$track9_w2)
DE_W2$track10_w2 <- as.numeric(DE_W2$track10_w2)

DE_W2 <- DE_W2 %>%
  mutate(track6_w2 = dplyr::recode(track6_w2, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track8_w2_1 = dplyr::recode(track8_w2_1, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track8_w2_2 = dplyr::recode(track8_w2_2, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track8_w2_3 = dplyr::recode(track8_w2_3, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track8_w2_4 = dplyr::recode(track8_w2_4, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track8_w2_5 = dplyr::recode(track8_w2_5, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track9_w2 = dplyr::recode(track9_w2, '12'=11))
DE_W2 <- DE_W2 %>%
  mutate(track10_w2 = dplyr::recode(track10_w2, '12'=11))

# after checking codebook
table(DE_W2$tradeoff_w2) #1-12, recode to 1 to 11
table(DE_W2$goal1_w2) #1-12, recode to 1 to 11
table(DE_W2$goal2_w2) #1-12, recode to 1 to 11

DE_W2 <- DE_W2 %>%
  mutate(tradeoff_w2 = tradeoff_w2-1)
DE_W2 <- DE_W2 %>%
  mutate(goal1_w2 = goal1_w2-1)
DE_W2 <- DE_W2 %>%
  mutate(goal2_w2 = goal2_w2-1)

write_sav(DE_W2, "UCL_DEL_Panel_Germany_Wave2_client_new.sav")


#### update DE_W3
names(DE_W3)
# all missing variables in

#rename variables
DE_W3 <- DE_W3 %>% 
  rename(religion = reli,
         income = hinc,
         education = educ_neu) 

table(DE_W3$income) #recode 13 to 777
DE_W3$income <- as.numeric(DE_W3$income)
DE_W3 <- DE_W3 %>%
  mutate(income = dplyr::recode(income, '13'=777))

#recode
table(DE_W3$purpose_w3) #1-12, recode to 0-11
table(DE_W3$cost5_w3) #1-12, recode to 0-11
table(DE_W3$trust1_w3) #1-12, recode to 0-11
table(DE_W3$trust2_w3) #1-12, recode to 0-11
table(DE_W3$trust3_w3) #1-12, recode to 0-11
table(DE_W3$dem1_w3) #1-12, recode to 0 to 11
table(DE_W3$dnut_w3) #1-12, recode to 0 to 11

#subtracting the variables
DE_W3 <- DE_W3 %>%
  mutate(purpose_w3 = purpose_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(cost5_w3 = cost5_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(trust1_w3 = trust1_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(trust2_w3 = trust2_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(trust3_w3 = trust3_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(dem1_w3 = dem1_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(dnut_w3 = dnut_w3-1)

table(DE_W3$track6_w3) #1-12, recode to 0-11
table(DE_W3$track8_w3_1) #1-12, recode to 0-11
table(DE_W3$track8_w3_2) #1-12, recode to 0-11
table(DE_W3$track8_w3_3) #1-12, recode to 0-11
table(DE_W3$track8_w3_4) #1-12, recode to 0-11
table(DE_W3$track8_w3_5) #1-12, recode to 0-11
table(DE_W3$track9_w3) #1-12, recode to 0-11
table(DE_W3$track10_w3) #1-12, recode to 0-11

DE_W3 <- DE_W3 %>%
  mutate(track6_w3 = track6_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(track8_w3_1 = track8_w3_1-1)
DE_W3 <- DE_W3 %>%
  mutate(track8_w3_2 = track8_w3_2-1)
DE_W3 <- DE_W3 %>%
  mutate(track8_w3_3 = track8_w3_3-1)
DE_W3 <- DE_W3 %>%
  mutate(track8_w3_4 = track8_w3_4-1)
DE_W3 <- DE_W3 %>%
  mutate(track8_w3_5 = track8_w3_5-1)
DE_W3 <- DE_W3 %>%
  mutate(track9_w3 = track9_w3-1)
DE_W3 <- DE_W3 %>%
  mutate(track10_w3 = track10_w3-1)

write_sav(DE_W3, "UCL_DEL_Panel_Germany_Wave3_client_DELseg_new.sav")


#### update DE_W4
DE_W4 <- read_sav("UCL_DEL_Panel_Germany_Wave4_client_new.sav")
names(DE_W4)
# no missing variables
table(DE_W4$education)
#rename variables
DE_W4 <- DE_W4 %>% 
  rename(religion = reli,
         income = hinc,
         education = educ_neu) 

#recode
table(DE_W4$purpose_w4) #1-12, recode to 0-11
table(DE_W4$cost5_w4) #1-12, recode to 0-11
table(DE_W4$trust1_w4) #0-11
table(DE_W4$trust2_w4) #0-11
table(DE_W4$trust3_w4) #0-11
table(DE_W4$dem1_w4) #1-12, recode to 0 to 11
table(DE_W4$dnut_w4) #1-12, recode to 0 to 11

#subtracting the variables
DE_W4 <- DE_W4 %>%
  mutate(purpose_w4 = purpose_w4-1)
DE_W4 <- DE_W4 %>%
  mutate(cost5_w4 = cost5_w4-1)
DE_W4 <- DE_W4 %>%
  mutate(dem1_w4 = dem1_w4-1)
DE_W4 <- DE_W4 %>%
  mutate(dnut_w4 = dnut_w4-1)

table(DE_W4$track6_w4) #0-10, 12, recode 12 to 11
table(DE_W4$track8_w4_1) #0-10, 12, needs to be recoded
table(DE_W4$track8_w4_2) #0-10, 12, needs to be recoded
table(DE_W4$track8_w4_3) #0-10, 12, needs to be recoded
table(DE_W4$track8_w4_4) #0-10, 12, needs to be recoded
table(DE_W4$track8_w4_5) #0-10, 12, needs to be recoded
table(DE_W4$track9_w4) #0-10, 12, needs to be recoded
table(DE_W4$track10_w4) #0-10, 12, needs to be recoded

#turn to numeric before recoding 12 to 11
DE_W4$track6_w4 <- as.numeric(DE_W4$track6_w4)
DE_W4$track8_w4_1 <- as.numeric(DE_W4$track8_w4_1)
DE_W4$track8_w4_2 <- as.numeric(DE_W4$track8_w4_2)
DE_W4$track8_w4_3 <- as.numeric(DE_W4$track8_w4_3)
DE_W4$track8_w4_4 <- as.numeric(DE_W4$track8_w4_4)
DE_W4$track8_w4_5 <- as.numeric(DE_W4$track8_w4_5)
DE_W4$track9_w4 <- as.numeric(DE_W4$track9_w4)
DE_W4$track10_w4 <- as.numeric(DE_W4$track10_w4)

DE_W4 <- DE_W4 %>%
  mutate(track6_w4 = dplyr::recode(track6_w4, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track8_w4_1 = dplyr::recode(track8_w4_1, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track8_w4_2 = dplyr::recode(track8_w4_2, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track8_w4_3 = dplyr::recode(track8_w4_3, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track8_w4_4 = dplyr::recode(track8_w4_4, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track8_w4_5 = dplyr::recode(track8_w4_5, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track9_w4 = dplyr::recode(track9_w4, '12'=11))
DE_W4 <- DE_W4 %>%
  mutate(track10_w4 = dplyr::recode(track10_w4, '12'=11))

table(DE_W4$DELseg)

write_sav(DE_W4, "DEL_Panel_Germany_Wave_4_2022_Data.sav")


#### update DE_W5

names(DE_W5)
# no missing variables
table(DE_W5$edu)
#rename variables
DE_W5 <- DE_W5 %>% 
  rename(religion = reli,
         income = hinc,
         education = educ_neu) 

table(DE_W5$education)

#recode
table(DE_W5$purpose_w5) #1-12, recode to 0-11
table(DE_W5$cost5_w5) #1-12, recode to 0-11
table(DE_W5$trust1_w5) #0-11
table(DE_W5$trust2_w5) #0-11
table(DE_W5$trust3_w5) #0-11
table(DE_W5$dem1_w5) #1-12, recode to 0 to 11
table(DE_W5$dnut_w5) #1-12, recode to 0 to 11

#subtracting the variables
table(DE_W5$purpose_w5) #1-12, recode to 0 to 11

DE_W5 <- DE_W5 %>%
  mutate(purpose_w5 = purpose_w5-1)

table(DE_W5$cost5_w5) #1-12, recode to 0 to 11

DE_W5 <- DE_W5 %>%
  mutate(cost5_w5 = cost5_w5-1)

table(DE_W5$dem1_w5) #1-12, recode to 0 to 11

DE_W5 <- DE_W5 %>%
  mutate(dem1_w5 = dem1_w5-1)

table(DE_W5$dnut_w5) # MISSING

table(DE_W5$track6_w5) #0-10, 12, recode 12 to 11
table(DE_W5$track8_w5_1) #0-10, 12, needs to be recoded
table(DE_W5$track8_w5_2) #0-10, 12, needs to be recoded
table(DE_W5$track8_w5_3) #0-10, 12, needs to be recoded
table(DE_W5$track8_w5_4) #0-10, 12, needs to be recoded
table(DE_W5$track8_w5_5) #0-10, 12, needs to be recoded
table(DE_W5$track9_w5) #0-10, 12, needs to be recoded
table(DE_W5$track10_w5) #0-10, 12, needs to be recoded

#turn to numeric before recoding 12 to 11
DE_W5$track6_w5 <- as.numeric(DE_W5$track6_w5)
DE_W5$track8_w5_1 <- as.numeric(DE_W5$track8_w5_1)
DE_W5$track8_w5_2 <- as.numeric(DE_W5$track8_w5_2)
DE_W5$track8_w5_3 <- as.numeric(DE_W5$track8_w5_3)
DE_W5$track8_w5_4 <- as.numeric(DE_W5$track8_w5_4)
DE_W5$track8_w5_5 <- as.numeric(DE_W5$track8_w5_5)
DE_W5$track9_w5 <- as.numeric(DE_W5$track9_w5)
DE_W5$track10_w5 <- as.numeric(DE_W5$track10_w5)

DE_W5 <- DE_W5 %>%
  mutate(track6_w5 = dplyr::recode(track6_w5, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track8_w5_1 = dplyr::recode(track8_w5_1, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track8_w5_2 = dplyr::recode(track8_w5_2, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track8_w5_3 = dplyr::recode(track8_w5_3, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track8_w5_4 = dplyr::recode(track8_w5_4, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track8_w5_5 = dplyr::recode(track8_w5_5, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track9_w5 = dplyr::recode(track9_w5, '12'=11))
DE_W5 <- DE_W5 %>%
  mutate(track10_w5 = dplyr::recode(track10_w5, '12'=11))

DE_W5 <- DE_W5 %>%
  rename(WeightPanelW5 = weight)

table(DE_W5$DELseg)

write_sav(DE_W5, "DEL_Panel_Germany_Wave_5_2023_Data.sav")

#####################################################################
# checking names

names(DE_W1)
names(DE_W2)
names(DE_W3)
names(DE_W4)


# comparing dataframes for DE
DE_W1_comp <- DE_W1 %>% 
  select(GUNQID:cos1_w1_4)
DE_W2_comp <- DE_W2 %>% 
  select(GUNQID:cos1_w2_4)
DE_W3_comp <- DE_W3 %>% 
  select(GUNQID:cos1_w3_4)
DE_W4_comp <- DE_W4 %>% 
  select(GUNQID:cos1_w4_4)

DE_W1_comp <- DE_W1_comp %>%
  rename_with(~paste0(sub("_w1*", "", .)), -1)
DE_W2_comp <- DE_W2_comp %>%
  rename_with(~paste0(sub("_w2*", "", .)), -1)
DE_W3_comp <- DE_W3_comp %>%
  rename_with(~paste0(sub("_w3*", "", .)), -1)
DE_W4_comp <- DE_W4_comp %>%
  rename_with(~paste0(sub("_w4*", "", .)), -1)

data.comp_DE <- compare_df_cols(DE_W1_comp, DE_W2_comp, DE_W3_comp, DE_W4_comp)